#include "statsxx/machine_learning/NeuralNet.hpp"

// STL
#include <iostream>

// stats++
#include "statsxx/machine_learning/activation_functions.hpp" // activation_function::Logistic, activation_function::softplus


inline std::vector<std::vector<double>> NEURAL_NET::derivs(std::vector<double> input)
{
    std::vector<std::vector<double>> derivs;

    // load in the input to the neural net
    // << jmm: this probably could be extended to recurrent networks, treating each variable at each instant as an independent variable ... though special care would probably need to be taken with if( neuron_idx == inp_idx ) ... below >>
    std::vector<std::vector<double>> inp2(1, input);
    std::vector<double> tmp_out;
    this->evaluate(inp2, tmp_out);

    /*
    // reset the network
    this->clear_memory();

    // perform forward propagation to get the output at each neuron
    this->forward_prop(input);
    */

    // calculate the derivative
    for( decltype(m_out_neurons.size()) j = 0; j < m_out_neurons.size(); ++j )
    {
        std::vector<double> deriv;

        for( int i = 0; i < m_ninp; ++i )
        {
            double dx = this->deriv(m_neurons[m_out_neurons[j]].m_ID, i);

            // NOTE: because jNEURALNET is moving away from any preprocessing, it is up to the user to appropriately apply any chain rule (below)

            // we want to calculate dz/dx, but because of scaling we have access to dz/dx' ...
            // ... using the chain rule, (dz/dx) = (dx'/dx)*(dz/dx')
            //dx /= m_inp_stddev[i];

            // also because of scaling, we don't calculate dz/dx, but rather dz'/dx ...
            // ... it can easily be seen that dz/dx = (ymax-ymin)*dz'/dx
            //dx *= (m_out_max[j] - m_out_min[j]);

            deriv.push_back( dx );
        }

        derivs.push_back( deriv );
    }

    return derivs;
}
